program prepare_lr_vars	
	ren v2 price
	ren v4 postcode 

	gen new = v6 == "Y"

	gen byte lease = v7 == "L"
	

	*--- Dates
	g dated = clock(v3, "YMDhm")
	replace dated = dofc(dated)
	format dated %td

	g datem = mofd(dated)
	format datem %tm
	g dateq = qofd(dated)
	format dateq %tq
	g year = year(dated)


	*--- Units for repeat sales indices
	ren v8 paon
	ren v9 saon 
	ren v10 street

	drop v3 v5 v6 v7
end



*--- 1) Prepare one-year files

forvalues i = 2013/2020 {
	import delimited ///
		"${datadir_landreg}\land-registry-2020-12-19\pp-`i'.csv", ///
		colrange(1:10) clear

	prepare_lr_vars
	
	save "${datadir}\landreg_`i'", replace
}



*--- 2) Append files together

use "${datadir}\landreg_2013", clear

forvalues i = 2014/2020 {
	append using "${datadir}\landreg_`i'"
}


ren v1 transaction_id

egen long unit_id = group(postcode paon saon street), missing 
drop saon paon street

save "${datadir}\landreg_2013-2020", replace



*--- 3) Keep only the properties with the first sale as new

*use "${datadir}\landreg_2013-2020", replace

bysort unit_id: egen prevnew = max(new)
keep if prevnew

// preparation for future matches
gen pcd = itrim(postcode)
replace pcd	= regexr(pcd," ","") 

save "${datadir}\landreg_2013-2020_prevnew", replace
